﻿from urllib import request
import re
page = 1
url = "http://www.qiushibaike.com/hot/page/"+str(page);
req = request.Request(url)
req.add_header("User-Agent","Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.110 Safari/537.36");
pattern = '<div.*?author clearfix">.*?<a.*?<a.*?<h2>(.*?)</h2>.*?<span>(.*?)</span>.*?'
# re.S将换行也当成.
pa = re.compile(pattern,re.S)
#with语句最重要的事情是可以自动关闭
with request.urlopen(req) as f:
	print('Status:', f.status, f.reason)
	items = re.findall(pa,f.read().decode("utf-8"))
	for item in items:
		print(item[0]);
		print(item[1],"\n");
	# print(items)


# pattern = '<div.*?author clearfix">.*?<a.*?<a.*?<h2>(.*?)</h2>'
# pa = re.compile(pattern,re.S)
# if re.findall(pa,str):
	# print("ok")
# else:
	# print("false")
# items = re.findall(pa,str)
# print(items)
